
cap log close
log using "$logs/cr-clean-ocr.log", replace

***** BASED OFF cr-clean-ocr-v4


********************************************************************************
******************2011-12*******************************************************
********************************************************************************

// import files (unlike later years, 2011-12 has multiple files)

foreach file in ///
	"01-LEA Form" ///
	"02 - School Characteristics" ///
	"05 - Overall Enrollment" ///
	"35-2 - Students WO Disab Receiving one or more in-school suspensions" ///
	"35-3 - Students WO Disab Receiving only one out-of-school suspension" ///
	"35-4 - Students WO Disab Rec more than one out-of-school suspension" ///
	"36-2 - Students With Disabilities Receiving one or more in-school suspensions" ///
	"36-3 - Students With Disabilities Receiving only one out-of-school suspension" ///
	"36-4 - Students With Disab Receiving more than one out-of-school suspension" 	{ 
	
		//cd "${rawocr}\2011-12" 
		// import excel using "`file'", firstrow clear
		
		import excel using "$raw/ocr/2011-12/`file'.xlsx", firstrow clear
		
		save "$data/`file'", replace
		}

// merge files
		
use "${data}/02 - School Characteristics" , clear
merge m:1 LEAID using "${data}/01-LEA Form", gen(merge1)

local i=2
foreach file in ///
	"05 - Overall Enrollment" ///
	"35-2 - Students WO Disab Receiving one or more in-school suspensions" ///
	"35-3 - Students WO Disab Receiving only one out-of-school suspension" ///
	"35-4 - Students WO Disab Rec more than one out-of-school suspension" ///
	"36-2 - Students With Disabilities Receiving one or more in-school suspensions" ///
	"36-3 - Students With Disabilities Receiving only one out-of-school suspension" ///
	"36-4 - Students With Disab Receiving more than one out-of-school suspension" 	{ 
		
		merge 1:1 LEAID SCHID using "$data/`file'", gen(merge`i') 
		local ++i
		
		}

su merge*
drop merge*

/***************************************************************************
	rename vars to match var names in later year data files
**************************************************************************/

// total enrollments
rename M_BLA_7_ENROL	SCH_ENR_BL_M
rename F_BLA_7_ENROL	SCH_ENR_BL_F
rename M_WHI_7_ENROL	SCH_ENR_WH_M
rename F_WHI_7_ENROL	SCH_ENR_WH_F

// students w/out disabilities receiving only 1 out of sch susp
rename	M_BLA_7_SINGLE_SUS_NO_DIS	SCH_DISCWODIS_SINGOOS_BL_M
rename	F_BLA_7_SINGLE_SUS_NO_DIS	SCH_DISCWODIS_SINGOOS_BL_F
rename	M_WHI_7_SINGLE_SUS_NO_DIS	SCH_DISCWODIS_SINGOOS_WH_M
rename	F_WHI_7_SINGLE_SUS_NO_DIS	SCH_DISCWODIS_SINGOOS_WH_F

// sts w/ disabilities receiving only 1 out of sch susp
rename	M_BLA_7_SINGLE_SUS_DIS	SCH_DISCWDIS_SINGOOS_IDEA_BL_M
rename	F_BLA_7_SINGLE_SUS_DIS	SCH_DISCWDIS_SINGOOS_IDEA_BL_F
rename	M_WHI_7_SINGLE_SUS_DIS	SCH_DISCWDIS_SINGOOS_IDEA_WH_M
rename	F_WHI_7_SINGLE_SUS_DIS	SCH_DISCWDIS_SINGOOS_IDEA_WH_F

// students w/ disabilities receiving more than 1 out of sch susp
rename	M_BLA_7_MULT_SUS_DIS	SCH_DISCWDIS_MULTOOS_IDEA_BL_M
rename	F_BLA_7_MULT_SUS_DIS	SCH_DISCWDIS_MULTOOS_IDEA_BL_F
rename	M_WHI_7_MULT_SUS_DIS	SCH_DISCWDIS_MULTOOS_IDEA_WH_M
rename	F_WHI_7_MULT_SUS_DIS	SCH_DISCWDIS_MULTOOS_IDEA_WH_F

// students w/out disabilities receiving more than 1 out of sch susp
rename	M_BLA_7_MULT_SUS_NO_DIS	SCH_DISCWODIS_MULTOOS_BL_M
rename	F_BLA_7_MULT_SUS_NO_DIS	SCH_DISCWODIS_MULTOOS_BL_F
rename	M_WHI_7_MULT_SUS_NO_DIS	SCH_DISCWODIS_MULTOOS_WH_M
rename	F_WHI_7_MULT_SUS_NO_DIS	SCH_DISCWODIS_MULTOOS_WH_F

// students w/ disabilities receiving one or more in-school susp
rename	M_BLA_7_IN_SCH_SUS_DIS	SCH_DISCWDIS_ISS_IDEA_BL_M
rename	F_BLA_7_IN_SCH_SUS_DIS	SCH_DISCWDIS_ISS_IDEA_BL_F
rename	M_WHI_7_IN_SCH_SUS_DIS	SCH_DISCWDIS_ISS_IDEA_WH_M
rename	F_WHI_7_IN_SCH_SUS_DIS	SCH_DISCWDIS_ISS_IDEA_WH_F

// sts w/out disabilities receiving one or more in-sch susp
rename	M_BLA_7_IN_SCH_SUS_NO_DIS	SCH_DISCWODIS_ISS_BL_M
rename	F_BLA_7_IN_SCH_SUS_NO_DIS	SCH_DISCWODIS_ISS_BL_F
rename	M_WHI_7_IN_SCH_SUS_NO_DIS	SCH_DISCWODIS_ISS_WH_M
rename	F_WHI_7_IN_SCH_SUS_NO_DIS	SCH_DISCWODIS_ISS_WH_F


keep LEAID SCHID *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH* PreK

destring *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*, force replace // some have "‡" symbol, which is not same as 0

gen schendyr=2012
lab var schendyr "End of school year (2011-12=2012)"

su

compress
save "$data/1112_schlevel_panel.dta", replace

su *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*

unique LEAID
collapse (sum) *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*, by(LEAID) cw // use cw deletion
unique LEAID // 28 dropped for casewise; get list of them
/*
preserve
	merge 1:m LEAID using "$data/1112_schlevel_panel.dta"
	keep if _merge==2 // these are the 28 LEAs that got dropped for cw
	keep LEAID *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*
	export excel using "$output/sample-tracking.xlsx", sheet(ocr2012) sheetreplace firstrow(var)
	duplicates drop LEAID, force
	export excel LEAID using "$output/sample-tracking.xlsx", sheet(ocr2012lea) sheetreplace firstrow(var)
restore
*/

gen schendyr=2012
lab var schendyr "End of school year (2011-12=2012)"

ren *, lower
ren sch_* *

compress
save "$data/1112_lealevel_panel.dta", replace

********************************************************************************
******************2013-14*******************************************************
********************************************************************************

clear
set excelxlsxlargefile on

// import data files
foreach file in ///
	"01 LEA Characteristics" ///
	"01 School Characteristics" ///
	"03 Enrollment" ///
	"11-2 Suspensions (required elements)"  {
		
		import excel using "$raw/ocr/2013-14/`file'.xlsx", firstrow clear
		
		save "$data/`file'", replace
		}

// merge data files
use 	"$data/01 LEA Characteristics", clear 
merge 1:m LEAID using "$data/01 School Characteristics" , gen(merge2_1)

local i=2
foreach file in ///
	"03 Enrollment" ///
	"11-2 Suspensions (required elements)" {
	
		merge 1:1 LEAID SCHID using "$data/`file'" , gen(merge2_`i')
		local ++i
	
		}


su merge*
drop merge*

tab SCH_GRADE_PS, m
bys SCH_GRADE_PS: su *PS*

keep LEAID SCHID SCH_NAME *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*

su *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*

// drop those w/ data system overload flags (indicates unresolved error)
	drop if DSO_SCH_ENR_BL_M ==1 ///
		| DSO_SCH_ENR_BL_F==1 ///
		| DSO_SCH_ENR_WH_M ==1 ///
		| DSO_SCH_ENR_WH_F ==1
	

drop DSO* // data system overload flags
drop *IDEAENR* *LEPENR* *504ENR* *PROGENR* // lots of missing value codes, and included in total enrollments 
	

su

// this is the lea w/ all the 0s - they're in the original excel file: st. lucie fla
// fla was state w/ errors needing updates
list LEAID  SCH_DISCWODIS_ISS*  SCH_DISCWDIS_ISS* SCH_ENR_BL* SCH_ENR_WH* if LEAID=="1201770"

/* from data anomaly key:

-2 Small Cell Value 

 A reserve code of -5 is used to indicate that an LEA is unable to report all 
 required data elements in the current collection, but will be able to report 
 those data elements in the future (action plan).

 A data element that should have been reported (but was not) is identified 
 with a -7 in the data file.
 
 A reserve code of -9 is used to indicate that a data element is not applicable 
 (and should be skipped) based on a response to a precursor question. 
*/


foreach var of varlist *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH* {
	replace `var'=. if inlist(`var', -2, -5, -7)
	replace `var'=0 if `var'==-9 // when combine presch susp vars to non-presch susp vars, 
			// -9 for presch susp should be treated as 0
	}
	
su

gen schendyr=2014
lab var schendyr "End of school year (2011-12=2012)"

compress
save "$data/1314_schlevel_panel.dta", replace

unique LEAID
collapse (sum) *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*, by(LEAID) cw // use cw deletion
unique LEAID // 10 dropped from cw

/*
preserve
	merge 1:m LEAID using "$data/1314_schlevel_panel.dta"
	keep if _merge==2 // these are the 10 LEAs that got dropped for cw
	keep LEAID *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*
	export excel using "$output/sample-tracking.xlsx", sheet(ocr2014) sheetreplace firstrow(var)
	duplicates drop LEAID, force
	export excel LEAID using "$output/sample-tracking.xlsx", sheet(ocr2014lea) sheetreplace firstrow(var)
restore
*/

gen schendyr=2014
lab var schendyr "End of school year (2011-12=2012)"
	
su 

ren *, lower
ren sch_* *
	
compress	
save "$data/1314_lealevel_panel.dta", replace

********************************************************************************
******************2015-16*******************************************************
********************************************************************************

clear

insheet using "$raw/ocr/2015-16/CRDC 2015-16 School Data.csv",  clear c
save "$data/CRDC 2015-16 School Data", replace

ren *, upper

keep LEAID SCHID SCH_NAME *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *_ENR_BL* *_ENR_WH*
drop *DAYS*

su

foreach var of varlist *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH* {
	replace `var'=. if inlist(`var', -2, -5, -7)
	replace `var'=0 if `var'==-9
	}
	
gen schendyr=2016
lab var schendyr "End of school year (2011-12=2012)"

su

compress
save "$data/1516_schlevel_panel.dta", replace

unique LEAID
collapse (sum) *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*, by(LEAID) cw // use cw deletion
unique LEAID // 2 dropped for cw

/*
preserve
	merge 1:m LEAID using "$data/1516_schlevel_panel.dta"
	keep if _merge==2 // these are the 2 LEAs that got dropped for cw
	keep LEAID *ISS*BL* *ISS*WH* *OOS*BL* *OOS*WH* *ENR_BL* *ENR_WH*
	export excel using "$output/sample-tracking.xlsx", sheet(ocr2016) sheetreplace firstrow(var)
	duplicates drop LEAID, force
	export excel LEAID using "$output/sample-tracking.xlsx", sheet(ocr2016lea) sheetreplace firstrow(var)
restore
*/

gen schendyr=2016
lab var schendyr "End of school year (2011-12=2012)"
	
su 

ren *, lower
ren sch_* *

compress
save "$data/1516_lealevel_panel.dta", replace


/******************************************************************************
	append files 
******************************************************************************/

append using "$data/1112_lealevel_panel.dta"
append using "$data/1314_lealevel_panel.dta"

compress

isid leaid schendyr
save "$data/1216_lealevel_panel.dta", replace

log close
